import jieba
import pandas as pd
import numpy as np
from wordcloud import WordCloud
import cv2
import matplotlib.pyplot as plt


def stop_world():
    stop_world_list = []
    with open('D:\project\comment_analysis\哈工大停用词表.txt', 'r') as f:
        for i in f.readlines():
            stop_world_list.append(i.strip())
    return stop_world_list


def cut_world(x):
    seg = jieba.cut(x.strip(), cut_all=False)
    stop_world_list = stop_world()
    out_str_list = ''
    for world in seg:
        if world not in stop_world_list:
            out_str_list += world
            out_str_list += '|'
    return out_str_list


def get_result():
    comn = pd.read_csv('D:/project/comment_analysis/kate_commt.csv', error_bad_lines=False)
    feihua = pd.read_csv('D:/project/comment_analysis/feihua.csv', encoding='gb2312')
    feihua_list = feihua['content_feihua'].tolist()
    real_comn = comn[~comn['content'].isin(feihua_list)]
    real_comn['cut_word'] = real_comn.loc[:, 'content'].astype(str).apply(cut_world)
    real_comn = real_comn.iloc[:50000, : ]
    print('real_comn')
    cuted_comn = real_comn[['num_iid', 'cut_word']].reset_index(drop=True)
    comn_1 = cuted_comn['cut_word'].astype(str).str.split('|', expand=True).stack()
    comn_1 = comn_1.reset_index(level=1, drop=True).rename('biaoqian')
    cuted_comn = cuted_comn.drop('cut_word', axis=1)
    qingxi_df = cuted_comn.join(comn_1)
    qingxi_df.to_csv('result.csv', encoding='utf-8-sig')
    print('haha')


def get_word_img():
    df = pd.read_csv('D:/project/comment_analysis/result.csv')
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df = df.dropna()
    group_id2 = df['biaoqian'].value_counts().to_frame().reset_index()
    bg_img = cv2.imread('D:/project/comment_analysis/bg_img.jpg')
    ziti_path = 'D:/project/comment_analysis/simheittf/simhei.ttf'
    word_cloud = WordCloud(background_color='black', max_font_size=110,
                           mask=bg_img, min_font_size=10, mode='RGBA',
                           font_path=ziti_path)
    word_frequence = {x[0]: x[1] for x in group_id2.values}
    wordcloud = word_cloud.fit_words(word_frequence)
    plt.title('title', fontsize=16)
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()


def hello_world():
    print('hello world!')




